List of sections:

  1. Load data and perform exclusions S1
  2. Basic demographic and behavioral data S2
  3. Create dissimilarity matrices for each subject S3
  4. Create average dissimilarity matrix for entire sample S4
  5. Heatmaps of per-state ratings and averaged dissimilarity barplots S5
  6. Stress values with increasing dimensionality S6
  7. 2D Multidimensional scaling S7
  8. 3D Multidimensional scaling S8
  9. Baseline-centered polar coordinates model S9
  10. Prepare data for Python-based models - additional exclusions S11

 

1. Load data and perform exclusions

# Load required packages
library(readxl)
library(tidyr)
library(dplyr)
library(ggplot2)
library(smacof)
library(plotly)
library(MASS)
library(fitdistrplus)
library(extremevalues)
library(here)
library(patchwork)
library(scatterplot3d)
library(rgl)
library(gridExtra)
library(viridis)

# Set working directory
mds_dir <- here::here()

# Load data
df_full  <- read.csv(file = paste0(mds_dir,"/MDS_ASC_data"), sep = "\t", header = TRUE, quote = "\"")

#Basic demographic statistics before filtering
df_full %>%
  summarise(
    Number_of_Participants = n(),
    Mean_Age = mean(Age, na.rm = TRUE),
    SD_Age = sd(Age, na.rm = TRUE),
    Male_Participants = sum(Gender == "Man"),
    Female_Participants = sum(Gender == "Woman"),
    Other_Participants = sum(Gender == "Other")
  ) %>%
  print()
##   Number_of_Participants Mean_Age   SD_Age Male_Participants
## 1                    785 30.10828 8.328384               412
##   Female_Participants Other_Participants
## 1                 359                 14
## Perform exclusions according to the preregistered criteria

#(1) Average response time per dissimilarity rating below 2 seconds
#hist(df_full$time_per_subs, breaks = 100)
cat("Response time above 2s:", sum(df_full$time_per_subs < 2, na.rm = TRUE), "\n")
## Response time above 2s: 0
df_filtered <- filter(df_full,  time_per_subs > 2)


#(2) Inaccurate responses to the two control questions

# Test 1
#hist(df_filtered$test1, breaks = 100, xlim = c(0,100), ylim = c(0,50))
cat("Uncorrect response to test1:", sum(df_filtered$test1 < 99, na.rm = TRUE), "\n")
## Uncorrect response to test1: 10
df_filtered <- filter(df_filtered,  test1 >= 99) 

# Test2
#hist(df_filtered$test2, breaks = 100, xlim = c(0,7), ylim = c(0,100))
cat("Uncorrect response to test2:", sum(df_filtered$test2 > 1, na.rm = TRUE), "\n")
## Uncorrect response to test2: 36
df_filtered <- filter(df_filtered,  test2 <= 1) # liberal (2nd step)

# Recode ID variable
df_filtered$ID <- 1:length(df_filtered$ID)

#Basic demographic statistics after filtering
df_filtered %>%
  summarise(
    Number_of_Participants = n(),
    Male = sum(Gender == "Man"),
    Female = sum(Gender == "Woman"),
    Other = sum(Gender == "Other"),
    Mean_Age = mean(Age, na.rm = TRUE),
    SD_Age = sd(Age, na.rm = TRUE),
  ) %>%
  print()
##   Number_of_Participants Male Female Other Mean_Age   SD_Age
## 1                    739  385    340    14 29.82409 8.087188


 

2. Basic demographic and behavioral data

# Summary statistics and histogram for age
summary(df_filtered$Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   24.00   28.00   29.82   34.00   56.00
ggplot(df_filtered, aes(x = Age)) +
  geom_histogram(binwidth = 5, fill = "#21918c", color = "black", alpha = 0.7) +
  labs(title = "Age Distribution",
       x = "Age",
       y = "Frequency")+
  theme_minimal()

# Summary statistics and barplot for gender distribution
df_filtered$Gender <- factor(df_filtered$Gender, levels = c("Woman", "Man", "Other"))

table(df_filtered$Gender)
## 
## Woman   Man Other 
##   340   385    14
ggplot(df_filtered, aes(x = Gender, fill = Gender)) +
  geom_bar() +
  scale_fill_viridis_d() +  # Add this line to use viridis colors
  labs(title = "Gender Distribution",
       x = "Gender",
       y = "Count") +
  theme_minimal()

#convert education to factor, set levles
df_filtered$Education <- factor(df_filtered$Education, 
                                           levels = c("Primary School",
                                                      "Middle School",
                                                      "Vocational",
                                                      "High School",
                                                      "Bachelor's",
                                                      "Master's",
                                                      "PhD and higher"))
# Summary statistics for and barplot for education level
table(df_filtered$Education)
## 
## Primary School  Middle School     Vocational    High School     Bachelor's 
##              5             14             16            260            156 
##       Master's PhD and higher 
##            264             24
ggplot(df_filtered, aes(x = Education, fill = Education)) +
  geom_bar() +
  scale_fill_viridis_d() + 
  labs(title = "Education Distribution",
       x = "Education",
       y = "Count")+
  theme(axis.text.x = element_text(size = 10, angle = 35, hjust = 1))+
  theme_minimal()

#convert place of residence to factor, set levles
df_filtered$Residence <- factor(df_filtered$Residence, 
                                           levels = c("Village",
                                                      "City of up to 50k",
                                                      "City 50 to 150k",
                                                      "City 150 to 500k",
                                                      "City of over 500k"))

# Summary statistics for and plot for place of residence
table(df_filtered$Residence)
## 
##           Village City of up to 50k   City 50 to 150k  City 150 to 500k 
##                63               107                88               115 
## City of over 500k 
##               366
ggplot(df_filtered, aes(x = Residence, fill = Residence)) +
  geom_bar() +
  labs(title = "Place of Residence Distribution",
       x = "Place of Residence",
       y = "Count")+
  scale_fill_viridis_d() + 
  theme(axis.text.x = element_text(size = 10, angle = 35, hjust = 1))+
  theme_minimal()

# Summary statistics and plot for number of compared substances
summary(df_filtered$Subs_N)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.000   5.000   7.000   7.491   9.000  15.000
ggplot(df_filtered, aes(x = Subs_N)) +
  geom_histogram(binwidth = 1, fill = "#21918c", color = "black", alpha = 0.7) +
  labs(title = "Distribution of number of compared substances",
       x = "Number of compared substances",
       y = "Number of participants")+
    scale_x_continuous(breaks = 4:15)+
  theme_minimal()

# Summary statistics and barplot for history of psychiatric diagnosis
table(df_filtered$EverDiagnosis)
## 
##  No Yes 
## 471 268
ggplot(df_filtered, aes(x = EverDiagnosis, fill = EverDiagnosis)) +
  geom_bar() +
  labs(title = "History of Psychiatric Diagnosis",
      x = "Ever diagnosis",
       y = "Count")+
  scale_fill_viridis_d() + 
  theme(axis.title = element_text(size = 12), 
        plot.title = element_text(size = 12),
        legend.title = element_text(size = 12))+
  theme_minimal()

# Summary statistics and barplot for History of use of drugs with psychoactive effects
table(df_filtered$Medication)
## 
##  No Yes 
## 339 400
ggplot(df_filtered, aes(x = Medication, fill = Medication)) +
  geom_bar() +
  labs(title = "History of use of drugs with psychoactive effects",
       x = "Ever Medication",
       y = "Count")+
  scale_fill_viridis_d() + 
  theme(axis.title = element_text(size = 12), 
        plot.title = element_text(size = 12),
        legend.title = element_text(size = 12))+
  theme_minimal()


 

3. Create dissimilarity matrices for each subject

# Create a sample data frame
substance_codes <- c("Baseline", "Alc", "MJ", "MDMA", "Amf", "LSD", "Psy", "Mef", "Coc", 
                     "Alp", "Ket", "DMT", "N2O", "DXM", "Cod", "Tra", "Her", "Salv", 
                     "GHB", "Dat", "Ben", "2CB", "Diph")

# adjust name column name
colnames(df_filtered) <- gsub("X2CB", "2CB", colnames(df_filtered))

# Create all combinations of substance codes
combinations <- expand.grid(Var1 = substance_codes, Var2 = substance_codes, stringsAsFactors = FALSE)
combinations <- subset(combinations, Var1 != Var2)
combinations <- subset(combinations, Var1 < Var2)
column_names <- apply(combinations, 1, function(x) paste(x, collapse = "_"))


# Loop through each subject's data
for (i in as.numeric(df_filtered$ID)) {
  subject_id <- i
  
  # Extract data for the current subject
  subject_data <- df_filtered[df_filtered$ID == subject_id, ]
  
  # Create a data frame with substance codes as both rows and columns
  subject_df <- data.frame(matrix(NA, nrow = length(substance_codes), ncol = length(substance_codes),dimnames = list(substance_codes, substance_codes)))
  
colnames(subject_df) <- substance_codes
rownames(subject_df) <- substance_codes
  
  # Populate the data frame with values from the corresponding columns
  for (code1 in substance_codes) {
    for (code2 in substance_codes) {
      col_name1 <- paste(code1, code2, sep = "")
      col_name2 <- paste(code2, code1, sep = "")
      if (col_name1 != col_name2) {
        
        if (!is.na(subject_data[[col_name1]])) {
          subject_df[code1, code2] <- subject_data[[col_name1]]
          subject_df[code2, code1] <- subject_data[[col_name1]]
          
        } else if (!is.na(subject_data[[col_name2]])) {
          subject_df[code1, code2] <- subject_data[[col_name2]]
          subject_df[code2, code1] <- subject_data[[col_name2]]
        }
      }
    }
  }
  
  # Assign the subject's data frame to the global environment with a unique name
  assign(paste("df_", subject_id, sep = ""), subject_df, envir = .GlobalEnv)
}

# Remove redundant file
rm(subject_df)
rm(subject_data)

# Create a list of all individual data frames
list_df <- lapply(as.numeric(df_filtered$ID), function(i) {
  df_name <- paste("df_", i, sep = "")
  if (exists(df_name, envir = .GlobalEnv)) {
    get(df_name)
  } else {
    NULL
  }
})


 

4. Create average dissimilarity matrix for entire sample

## Derive averaged dissimilarity ratings and number of comparisons

# Initialization of empty arrays
comparisons_n <- matrix(0, nrow = ncol(list_df[[1]]), ncol = ncol(list_df[[1]]))
dt_23 <- comparisons_n

# Calculating average values and number of comparisons
for (i in 1:length(list_df)) {
  df <- list_df[[i]]
  comparisons_n[!is.na(df)] <- comparisons_n[!is.na(df)] + 1
  dt_23[!is.na(df)] <- dt_23[!is.na(df)] + as.numeric(df[!is.na(df)])
}
dt_23 <- dt_23 / comparisons_n

# Adding names to rows and columns
colnames(dt_23) <- substance_codes
rownames(dt_23) <- substance_codes
colnames(comparisons_n) <- substance_codes
rownames(comparisons_n) <- substance_codes

# Save the final matrix
dt <- dt_23
rm(df)

# Remove states without the expected number of obtained ratings (Diphenidine,Datura,Benzydamine)
dt <- dt[!(rownames(dt) %in% c("Diph", "Dat","Ben")), ]
dt <- dt[, !(colnames(dt) %in% c("Diph", "Dat","Ben"))]


## Derive additional descriptive varialbes (proportion of ratings & average dissimilarity rating)

# Initialization of empty data frame
descriptive_df <- data.frame(ID = numeric(0), Proportion_of_ratings = numeric(0), Average_dissimilarity = numeric(0))

# Loop through each element in list_df
for (i in seq_along(list_df)) {
  
  # Extract the current dataframe
  current_df <- list_df[[i]]
  current_df <- as.data.frame(lapply(current_df, as.numeric))
  
  # Calculate Proportion_of_ratings
  prop_ratings <- sum(!is.na(current_df)) / (length(current_df) * length(current_df))
  
  # Calculate Average_dissimilarity
  avg_distance <- mean(as.matrix(current_df), na.rm = TRUE)
  
  # Append the results to the descriptive_df
  descriptive_df <- rbind(descriptive_df, data.frame(ID = i, Proportion_of_ratings = prop_ratings, Average_dissimilarity = avg_distance))
}

rm(current_df)
## Optional: remove individual data frames as separate objects
objects_to_remove <- paste("df_", 1:739, sep = "")
rm(list = objects_to_remove, envir = .GlobalEnv)


 

5. Heatmaps of per-state ratings and averaged dissimilarity barplots

# Define colors for different states
point_col <- c(
  Baseline = "#2B2B2B",    
  Alc = "#8A99BF", #Alcohol      
  MJ = "#327D43",  #Marijuana         
  MDMA = "#7B3894",#MDMA        
  Amf = "#8B2B2B", #Amphetamine       
  LSD = "#5998BA", #LSD         
  Psy = "#5DADB3", #Psylocybine
  Mef = "#A23E71", #Mephedrone
  Coc = "#BF436E", #Cocaine
  Alp = "#6B86B0", #Aplrazolam
  Ket = "#505CB9", #Ketamine
  DMT = "#108BB8", #DMT
  N2O = "#6861C7", #Nitrous Oxide  
  DXM = "#7A65A8", #DXM
  Cod = "#ACA232", #Codeine
  Tra = "#AC845F", #Tramadol
  Her = "#755B28", #Heroine/Morphine
  Salv = "#AFA0BD", #Salvia Divinorum
  GHB = "#617991", #GHB
  `2CB` = "#6AA4BA"  #2CB
)

labels <- c(
  "Baseline" = "Baseline",
  "Alc" = "Alcohol",
  "MJ" = "Marijuana",
  "MDMA" = "MDMA",
  "Amf" = "Amphetamine",
  "LSD" = "LSD",
  "Psy" = "Psilocybin",
  "Mef" = "Mephedrone",
  "Coc" = "Cocaine",
  "Alp" = "Alprazolam",
  "Ket" = "Ketamine",
  "DMT" = "DMT",
  "N2O" = "Nitrous oxide",
  "DXM" = "DXM",
  "Cod" = "Codeine",
  "Tra" = "Tramadol",
  "Her" = "Heroin",
  "Salv" = "Salvia",
  "GHB" = "GHB/GBL",
  "Dat" = "Datura",
  "Ben" = "Benzydamine",
  "2CB" = "2C-B",
  "Diph" = "Diphenidine"
)

#Create heatmap of number of ratings for all comparisons
data_for_heatmap <- reshape2::melt(comparisons_n)

data_for_heatmap$Var1 <- factor(data_for_heatmap$Var1, levels = rev(c( "Baseline", "MJ","Alc", "MDMA", "Psy", "LSD", "Amf", "Coc", "Mef", "Alp", "Ket","2CB","N2O", "DMT","Cod", "Tra", "DXM","GHB", "Her", "Salv", "Ben","Dat", "Diph")))
data_for_heatmap$Var2 <- factor(data_for_heatmap$Var2, levels = c( "Baseline", "MJ","Alc", "MDMA", "Psy", "LSD", "Amf", "Coc", "Mef", "Alp", "Ket","2CB","N2O", "DMT","Cod", "Tra", "DXM","GHB", "Her", "Salv", "Ben","Dat", "Diph"))

ggplot(data_for_heatmap, aes(x = Var2, y = Var1, fill = value)) +
  geom_tile() +
  scale_fill_gradientn(colours = c("white", "orange", "red"),
                       values = scales::rescale(c(0, 150, 150, 150, 800)))+
  geom_text(aes(label = value), vjust = 0.6, size = 2.5, colour = "#333333") + 
  labs(x = "", y = "") +
  theme_minimal() + 
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 10, colour = "black" ),
    axis.text.y = element_text(size = 10, colour = "black"),
    legend.position = "none",
    plot.background = element_rect(fill = "white")) +
  scale_x_discrete(labels = labels) +  
  scale_y_discrete(labels = labels)
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## zwijanie do unikalnych wartości 'x'

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## zwijanie do unikalnych wartości 'x'

#Create heatmap of number of ratings for subset of substances

data_for_heatmap_filtered <- data_for_heatmap %>%
  filter(!(Var1 %in% c("Ben", "Dat", "Diph")) & !(Var2 %in% c("Ben", "Dat", "Diph")))

ggplot(data_for_heatmap_filtered, aes(x = Var2, y = Var1, fill = value)) +
  geom_tile() +
  scale_fill_gradientn(colours = c("white", "orange", "red"),
                       values = scales::rescale(c(0, 150, 150, 150, 800)))+
  geom_text(aes(label = value), vjust = 0.6, size = 2.5, colour = "#333333") + 
  labs(x = "", y = "") +
  theme_minimal() + 
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 10, colour = "black" ),
    axis.text.y = element_text(size = 10, colour = "black"),
    legend.position = "none",
    plot.background = element_rect(fill = "white")) +
  scale_x_discrete(labels = labels) +  
  scale_y_discrete(labels = labels)
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## zwijanie do unikalnych wartości 'x'

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## zwijanie do unikalnych wartości 'x'

# Create barplots of average disimilarity
dt_long <- reshape2::melt(dt, varnames = "group", value.name = "value")
colnames(dt_long)[2] <- 'plot'

labs <- c("Baseline", "Alc", "MJ", "MDMA", "Amf", "LSD", "Psy", "Mef", "Coc", 
          "Alp", "Ket", "DMT", "N2O", "DXM", "Cod", "Tra", "Her", "Salv", 
          "GHB", "2CB")

for (val in labs) {
  subset_data <- dt_long[dt_long$group == val, ]

  subset_data$plot <- factor(subset_data$plot, levels = c('Baseline', 'Alp', 'Alc', 'GHB', 'Tra', 'Cod', 'Her', 'Coc', 'Amf', 'Mef', 'MDMA', 'MJ', 'N2O', 'DXM', 'Ket', 'Salv', '2CB', 'Psy', 'LSD', 'DMT'))
  subset_data[is.nan(subset_data$value), "value"] <- 0
  subset_data$transformed_D <- (exp(subset_data$value/100) - 1)


  p=ggplot(subset_data, aes(x = plot, y = transformed_D, fill = plot)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  labs(title = labels[val], x = "", y = "Mean disimilarity") +
  scale_fill_manual(values = point_col) +
  theme_minimal() +
  theme(legend.position = "none",
        plot.background = element_rect(fill = "white"),
        axis.text.x = element_text(angle = 45, hjust = 1, size = 10, colour = "black"),
        axis.text.y = element_text(size = 10, colour = "black"))+
  scale_y_continuous(limits = c(exp(0)-1, exp(1)-1), 
                     breaks = exp(seq(0,1, by = 0.1))-1,
                     labels = c(0,10,20,30,40,50,60,70,80,90,100))+
  scale_x_discrete(labels = labels)
  print(p)                   
}

# Version with all plots
# Initialize an empty list to store plots
plot_list <- list()

# Loop through each value in 'labs'
for (val in labs) {
  subset_data <- dt_long[dt_long$group == val, ]
  subset_data$plot <- factor(subset_data$plot, levels = c('Baseline', 'Alp', 'Alc', 'GHB', 'Tra', 'Cod', 'Her', 'Coc', 'Amf', 'Mef', 'MDMA', 'MJ', 'N2O', 'DXM', 'Ket', 'Salv', '2CB', 'Psy', 'LSD', 'DMT'))
  subset_data[is.nan(subset_data$value), "value"] <- 0
  subset_data$transformed_D <- (exp(subset_data$value/100) - 1)
  
  # Create the ggplot object for the current subset of data
  p <- ggplot(subset_data, aes(x = plot, y = transformed_D, fill = plot)) +
    geom_bar(stat = "identity", position = "dodge", color = "black") +
    labs(title = labels[val], x = "", y = "Mean disimilarity") +
    scale_fill_manual(values = point_col) +
    theme_minimal() +
    theme(legend.position = "none",
          plot.background = element_rect(fill = "white"),
          axis.text.x = element_text(angle = 45, hjust = 1, size = 6, colour = "black"),
          axis.text.y = element_text(size = 6, colour = "black"))+
    scale_y_continuous(limits = c(exp(0)-1, exp(1)-1), 
                       breaks = exp(seq(0,1, by = 0.1))-1,
                       labels = c(0,10,20,30,40,50,60,70,80,90,100))+
    scale_x_discrete(labels = labels)
  
  # Append the plot to the list
  plot_list[[val]] <- p
}

desired_sequence <- c("Baseline", "Alp", "Alc", "GHB", "Tra", "Cod", "Her", "Coc", "Amf", "Mef", "MDMA", "MJ", "N2O", "DXM", "Ket", "Salv", "2CB", "Psy", "LSD", "DMT")

# Reorder the plot_list according to the desired sequence
plot_list_reordered <- plot_list[desired_sequence]

# Combine all the plots into one big plot
big_plot <- wrap_plots(plotlist = plot_list_reordered, nrow = 4, ncol = 5)

## 12 x 10 cm export
big_plot <- wrap_plots(plotlist = plot_list_reordered, nrow = 5, ncol = 4)
#print(big_plot)


#Create barplot - number of users of particular substances
columns_to_plot <- c("List.MJ", "List.Alc", "List.MDMA", "List.Psy", "List.LSD", "List.Amf", 
                      "List.Coc", "List.Mef", "List.Alp", "List.Ket", "List.2CB", "List.N2O",
                      "List.DMT", "List.Cod", "List.Tra", "List.DXM",  "List.GHB", "List.Her",
                      "List.Salv", "List.Ben", "List.Dat", "List.Diph")

data_for_plot <- data.frame(
  Variable = factor(columns_to_plot, levels = columns_to_plot),
  Count = sapply(df_filtered[, columns_to_plot], function(x) sum(x == "Yes"))
)

custom_labels <- c(
  "List.Alc" = "Alcohol",
  "List.MJ" = "Marijuana",
  "List.MDMA" = "MDMA",
  "List.Amf" = "Amphetamine",
  "List.LSD" = "LSD",
  "List.Psy" = "Psilocybin",
  "List.Mef" = "Mephedrone",
  "List.Coc" = "Cocaine",
  "List.Alp" = "Alprazolam",
  "List.Ket" = "Ketamine",
  "List.DMT" = "DMT",
  "List.N2O" = "Nitrous oxide",
  "List.DXM" = "DXM",
  "List.Cod" = "Codeine",
  "List.Tra" = "Tramadol",
  "List.Her" = "Heroine",
  "List.Salv" = "Salvia divinorum",
  "List.GHB" = "GHB/GBL",
  "List.Dat" = "Datura",
  "List.Ben" = "Benzydamine",
  "List.2CB" = "2C-B",
  "List.Diph" = "Diphenidine"
)

data_for_plot$Variable <- factor(data_for_plot$Variable, levels = columns_to_plot)

ggplot(data_for_plot, aes(x = Variable, y = Count, fill = Variable)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = Count), vjust = -0.5, size = 3) +  
  labs(x = "",y = "Number of Users") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
    axis.text.y = element_text(size = 12),
    legend.position = "none",
    plot.background = element_rect(fill = "white"),
    axis.title.y = element_text(size = 12)) +
  scale_x_discrete(labels = custom_labels) +
  scale_fill_manual(
    values = c(
      "List.Alc" = "#8A99BF",
      "List.MJ" = "#327D43",
      "List.MDMA" = "#7B3894",
      "List.Amf" = "#8B2B2B",
      "List.LSD" = "#5998BA",
      "List.Psy" = "#5DADB3",
      "List.Mef" = "#A23E71",
      "List.Coc" = "#BF436E",
      "List.Alp" = "#6B86B0",
      "List.Ket" = "#505CB9",
      "List.DMT" = "#108BB8",
      "List.N2O" = "#6861C7",
      "List.DXM" = "#7A65A8",
      "List.Cod" = "#ACA232",
      "List.Tra" = "#AC845F",
      "List.Her" = "#755B28",
      "List.Salv" = "#AFA0BD",
      "List.GHB" = "#617991",
      "List.Dat" = "#7DAB23",
      "List.Ben" = "#6B8E23",
      "List.2CB" = "#6AA4BA",
      "List.Diph" = "#6A5ACD"
    )
  )

#Create Barplots - confidence ratings
columns_to_plot <- c("BaselineConf", "MJConf", "AlcConf", "MDMAConf", "AmfConf", "LSDConf",
  "PsyConf", "MefConf",  "CocConf", "AlpConf", "KetConf", "DMTConf", "N2OConf", "DXMConf",
  "CodConf", "TraConf",  "HerConf", "SalvConf", "GHBConf", "2CBConf")

data_for_plot <- df_filtered[columns_to_plot]

data_for_plot_numeric <- data_for_plot %>%
  mutate(across(everything(), ~ as.numeric(as.character(.))))

means <- data_for_plot_numeric %>%
  summarise(across(everything(), ~ mean(., na.rm = TRUE)))

errors <- data_for_plot_numeric %>%
  summarise(across(everything(), ~ sd(., na.rm = TRUE)))

custom_labels <- c(
  "List.Alc" = "Alcohol",
  "List.MJ" = "Marijuana",
  "List.MDMA" = "MDMA",
  "List.Amf" = "Amphetamine",
  "List.LSD" = "LSD",
  "List.Psy" = "Psilocybin",
  "List.Mef" = "Mephedrone",
  "List.Coc" = "Cocaine",
  "List.Alp" = "Alprazolam",
  "List.Ket" = "Ketamine",
  "List.DMT" = "DMT",
  "List.N2O" = "Nitrous oxide",
  "List.DXM" = "DXM",
  "List.Cod" = "Codeine",
  "List.Tra" = "Tramadol",
  "List.Her" = "Heroine",
  "List.Salv" = "Salvia divinorum",
  "List.GHB" = "GHB/GBL",
  "List.Dat" = "Datura",
  "List.Ben" = "Benzydamine",
  "List.2CB" = "2C-B",
  "List.Diph" = "Diphenidine"
)

data_for_plot_graph_long <- pivot_longer(means, cols = everything(), names_to = "Substance", values_to = "Mean")
errors_long <- pivot_longer(errors, cols = everything(), names_to = "Substance", values_to = "Error")
data_for_plot_graph_long <- left_join(data_for_plot_graph_long, errors_long, by = "Substance")


data_for_plot_graph_long <- data_for_plot_graph_long %>%
  filter(Substance %in% columns_to_plot) %>%
  mutate(Substance = factor(Substance, levels = columns_to_plot))

data_for_plot_graph_long$Substance <- sub("....$", "", data_for_plot_graph_long$Substance)

data_for_plot_graph_long$Substance <- factor(data_for_plot_graph_long$Substance, levels = c("Baseline", "MJ","Alc", "MDMA", "Psy", "LSD", "Amf", "Coc", "Mef", "Alp", "Ket","2CB","N2O", "DMT","Cod", "Tra", "DXM","GHB", "Her", "Salv"))

ggplot(data_for_plot_graph_long, aes(x = Substance, y = Mean, fill = Substance)) +
  geom_bar(stat = "identity") +
  geom_errorbar(aes(ymin = Mean - Error, ymax = Mean + Error), width = 0.2) +
  scale_fill_manual(values = point_col) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  xlab("Substance") +
  ylab("Confidence") +
  ggtitle("Average Confidence Ratings")+
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
    axis.text.y = element_text(size = 12),
    legend.position = "none",
    plot.background = element_rect(fill = "white"),
    axis.title.y = element_text(size = 12))+
    scale_x_discrete(labels = labels)+
    ylim(0,7.5)

rm(subset_data, labs, dt_long, data_for_heatmap_filtered,data_for_heatmap)


# Sort levels in decreasing order of mean values
ordered_levels_dec <- data_for_plot_graph_long %>%
  group_by(Substance) %>%
  summarize(mean_value = mean(Mean)) %>%
  arrange(desc(mean_value)) %>%
  pull(Substance)

# Create bar plots with sorted levels (decreasing)
ggplot(data_for_plot_graph_long, aes(x = factor(Substance, levels = ordered_levels_dec), y = Mean, fill = Substance)) +
  geom_bar(stat = "identity") +
  geom_errorbar(aes(ymin = Mean - Error, ymax = Mean + Error), width = 0.2) +
  scale_fill_manual(values = point_col) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  xlab("Substance") +
  ylab("Confidence") +
  ggtitle("Average Confidence Ratings") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
    axis.text.y = element_text(size = 12),
    legend.position = "none",
    plot.background = element_rect(fill = "white"),
    axis.title.y = element_text(size = 12)) +
  scale_x_discrete(labels = labels) +
  ylim(0, 7.5)


  6. Stress values with increasing dimensionality

# Initialize a vector to store stress values
stress_full <- numeric(10)

# Loop over dimensions
for (i in 1:10) {
  # Create "full" model with ndim = i
  mf <- mds(dt, ndim = i, type = "ordinal", init = "torgerson") # full data
  
  # Save stress values in respective vectors
  stress_full[i] <- mf$stress
}

# Set up the layout for 1x1 grid of plots
par(mfrow = c(1, 1))

## Create a plot with stress values (all states)
stress_full <- stress_full
dimensions <- 1:10
d_plot <- data.frame(Dimensions = dimensions, Stress = stress_full)

ggplot(d_plot, aes(x = Dimensions, y = Stress)) +
  geom_line(size = 1, color = "black") +
  geom_point(size = 3, color = "black") +
  ylim(0, 0.42) +
  geom_hline(yintercept = 0.1, linetype = "dashed", color = "#5A95D2FF", size = 0.8) +
  geom_hline(yintercept = 0.05, linetype = "dashed", color = "#29AF7FFF", size = 0.8) +
  scale_x_continuous(breaks = 1:10) +
  labs(
    title = " ",
    x = "Number of Dimensions",
    y = "Stress"
  ) +
  theme_minimal() +
  theme(
    text = element_text(size = 16),  # Set a uniform size for all text elements
    plot.title = element_text(face = "bold", hjust = 0.5),
    axis.title = element_text(),
    axis.text = element_text(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", fill = NA, size = 1)
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Define the new order (for visualization purposes)
states_order <- c('Baseline', 'Alp', 'Alc', 'GHB', 'Tra', 'Cod', 'Her', 'Coc', 'Amf', 'Mef', 'MDMA', 'MJ', 'N2O', 'DXM', 'Ket', 'Salv', '2CB', 'Psy', 'LSD', 'DMT')

# Create a new empty data frame with the desired order
dtr <- data.frame(matrix(NA, nrow = length(states_order), ncol = length(states_order)))
rownames(dtr) <- states_order
colnames(dtr) <- states_order

# Fill the new data frame with reordered values
for (i in states_order) {
  for (j in states_order) {
    if (i %in% rownames(dt) && j %in% colnames(dt)) {
      dtr[i, j] <- dt[i, j]
    }
  }
}

# Ensure the result is a data frame
dtr <- as.data.frame(dtr)

# define colors for reversed order
point_col_ord <- c(
  Baseline = "#2B2B2B",
  Alp = "#6B86B0", #Alprazolam
  Alc = "#8A99BF", #Alcohol
  GHB = "#617991", #GHB
  Tra = "#AC845F", #Tramadol
  Cod = "#ACA232", #Codeine
  Her = "#755B28", #Heroin/Morphine
  Coc = "#BF436E", #Cocaine
  Amf = "#8B2B2B", #Amphetamine
  Mef = "#A23E71", #Mephedrone
  MDMA = "#7B3894",#MDMA
  MJ = "#327D43",  #Marijuana
  N2O = "#6861C7", #Nitrous Oxide
  DXM = "#7A65A8", #DXM
  Ket = "#505CB9", #Ketamine
  Salv = "#AFA0BD",#Salvia Divinorum
  `2CB` = "#6AA4BA",#2C-B
  Psy = "#5DADB3", #Psilocybin
  LSD = "#5998BA", #LSD
  DMT = "#108BB8"  #DMT
)

##  Visualize stress contribution by each state across n-dimensional models
m2 <- mds(dtr, ndim = 2, type = "ordinal", init = "torgerson") 
m2 <- m2$spp
m3 <- mds(dtr, ndim = 3, type = "ordinal", init = "torgerson") 
m3 <- m3$spp
m4 <- mds(dtr, ndim = 4, type = "ordinal", init = "torgerson") 
m4 <- m4$spp
m5 <- mds(dtr, ndim = 5, type = "ordinal", init = "torgerson")
m5 <- m5$spp
m6 <- mds(dtr, ndim = 6, type = "ordinal", init = "torgerson") 
m6 <- m6$spp
m7 <- mds(dtr, ndim = 7, type = "ordinal", init = "torgerson") 
m7 <- m7$spp
dt_stress_contribution <- cbind(m2, m3, m4, m5, m6, m7)


# Set up the plot area and margins
par(mar = c(5, 5, 4, 12), xpd = TRUE)  # Increase right margin even more for legend

# Create color palette (adjust n to match your number of substances)
n <- nrow(dt_stress_contribution)

# Create an empty plot
plot(NULL, xlim = c(1, ncol(dt_stress_contribution)), ylim = range(dt_stress_contribution), 
     xlab = "n-Dimensional Model", ylab = "Stress contribution", 
     xaxt = "n", cex.lab = 1.2, cex.axis = 1.2, cex.main = 1.4)

# Add custom x-axis
axis(1, at = 1:ncol(dt_stress_contribution), labels = 1:ncol(dt_stress_contribution), cex.axis = 1.2)

# Add grid
grid(nx = ncol(dt_stress_contribution), ny = NULL, col = "lightgray", lty = "dotted")

# Iterate through each row (substance)
for (i in 1:nrow(dt_stress_contribution)) {
  lines(1:ncol(dt_stress_contribution), dt_stress_contribution[i,], col = point_col_ord[i], type = "l", lwd = 2)
}

# Add legend with smaller text and more columns, moved further right
legend("topright", legend = rownames(dt_stress_contribution), col = point_col_ord, 
       lty = 1, lwd = 2, cex = 0.7, ncol = ceiling(n/25), 
       inset = c(-0.55, 0), title = "States")

##  Visualize stress contribution by each state across n-dimensional models (2nd solution)
stress_full <- numeric(7)

# Loop over dimensions
for (i in 1:7) {
  # Create "full" model with ndim = i
  mf <- mds(dtr, ndim = i, type = "ordinal", init = "torgerson") # full data
  
  # Create "substances only" model with ndim = i
  # Save stress values in respective vectors
  stress_full[i] <- mf$stress
}

stress_full
## [1] 0.40354871 0.13274379 0.08440043 0.05285179 0.03709826 0.03032373 0.02189808
par(mfrow=c(1,1))
# Create the stacked bar plot with specified colors
barplot(as.matrix(dt_stress_contribution), beside = TRUE, col = point_col_ord, main = "", xlab = "n-Dimensional Models", ylab = "Stress contribution")

# Add legend with smaller text and more columns, moved further right
legend("topright", legend = rownames(dt_stress_contribution), col = point_col_ord, 
       lty = 1, lwd = 2, cex = 0.7, ncol = ceiling(n/25), 
       inset = c(-0.55, 0), title = "States")

# Removed redundant objects
rm(mf)
rm(d_plot)
rm(dt_stress_contribution)
rm(list = paste0("m", 2:7))


  7. 2D Multidimensional scaling

## Perform 2D Multidimensional Scaling
model_2d <- mds(dt, ndim = 2, type = "ordinal", init = "torgerson")

# Calculate and display stress value
stress_val = round(model_2d$stress,4)
stress_val
## [1] 0.1327
## Visualizations

# Set up plotting parameters
par(mfrow = c(1, 1))

# Extract state names from data
states <- rownames(dt)

# Use stress per point directly
sizes_o <- model_2d$spp

# Apply square-root transformation to point sizes
sizes_o <- sqrt(sizes_o)

# Normalize point sizes for better visibility
min_range <- 4
max_range <- 9
sizes_o <- ((sizes_o - min(sizes_o)) / (max(sizes_o) - min(sizes_o))) * (max_range - min_range) + min_range

## Optional rotation
coordinates <- model_2d$conf
rotation = T
angle = -35 

if (rotation == T) {
# Function to create a rotation matrix
create_rotation_matrix <- function(angle_degrees) {
  angle_radians <- angle_degrees * pi / 180
  matrix(c(cos(angle_radians), -sin(angle_radians),
           sin(angle_radians), cos(angle_radians)), 
         nrow = 2, ncol = 2)}

# Apply rotation to your MDS coordinates
rotate_coords <- function(coords, angle_degrees) {
  rotation_matrix <- create_rotation_matrix(angle_degrees)
  rotated <- as.matrix(coords) %*% rotation_matrix
  result <- as.data.frame(rotated)
  colnames(result) <- colnames(coords)  # Preserve original column names
  return(result)}

# Rotate your MDS coordinates (change the angle)
rotated_coords <- rotate_coords(model_2d$conf, angle)
coordinates <- rotated_coords
}

# Create labeled MDS plot
ggplot(data = as.data.frame(coordinates), aes(x = D1, y = D2)) +
  geom_point(color = alpha(point_col, 0.5), size = sizes_o) + 
  geom_point(color = point_col, size = 2) +                       
  geom_text(aes(label = rownames(model_2d$conf)), hjust = -0.5, vjust = -0.5, size = 3) +
  labs(title = "MDS Analysis", x = "Dimension 1", y = "Dimension 2") +
  coord_fixed() +
  xlim(-0.8, 1.1) + 
  ylim(-1, 0.85) + 
  theme_minimal() +
  theme(legend.position = "none")

# Create unlabeled MDS plot
ggplot(data = as.data.frame(coordinates), aes(x = D1, y = D2)) +
  geom_point(color = alpha(point_col, 0.5), size = sizes_o) + 
  geom_point(color = point_col, size = 2) +                       
  labs(title = "MDS Analysis", x = "Dimension 1", y = "Dimension 2") +
  coord_fixed() +
  xlim(-0.8, 1.1) + 
  ylim(-1, 0.85) +
  theme_minimal() +
  theme(legend.position = "none")

par(mfrow = c(1,1))


 

8. 3D Multidimensional scaling

## Perform 3D Multidimensional Scaling
model3d <- mds(dt, ndim = 3, type = "ordinal", init = "torgerson")
# Calculate and display stress value
stress_val = round(model3d$stress,4)
stress_val
## [1] 0.0844
## Visualization 
## Create 2D mappings for 3D model
# Set up the plotting area and adjust margins
par(mfrow = c(1,3), mar = c(4, 4, 2, 1), oma = c(0, 0, 2, 0))
# Plot the three configurations
plot(model3d, plot.dim = c(1,2), main = "", col = point_col, pch = 19, cex = 1.2)
title("D1 vs. D2", line = 0.5)
plot(model3d, plot.dim = c(1,3), main = "", col = point_col, pch = 19, cex = 1.2)
title("D1 vs. D3", line = 0.5)
plot(model3d, plot.dim = c(2,3), main = "", col = point_col, pch = 19, cex = 1.2)
title("D2 vs. D3", line = 0.5)
# Add an overall title
mtext("MDS Mappings - 3D model", outer = TRUE, cex = 1.2)

### Create an interactive 3D scatter plot with stress-wise sizing of individual points
par(mfrow = c(1,1))
# Extract state-stress contribution to visualize it as the size of point
sizes_o <- model3d$spp
# Apply square-root transformation to point sizes
sizes_o <- sqrt(sizes_o)
# Normalize point sizes for better visibility
min_range <- 6
max_range <- 16  # Adjust these values as needed
sizes_o <- ((sizes_o - min(sizes_o)) / (max(sizes_o) - min(sizes_o))) * (max_range - min_range) + min_range

## Create an interactive 3D scatter plot using plotly
par(mfrow = c(1,1))
# Create data frame with dimensions and sizes
labels <- rownames(model3d$conf)

# Ensure sizes_o is in the correct order
names(sizes_o) <- labels
sizes_o <- sizes_o[names(point_col_ord)]

# Create a factor with levels in the desired order
ordered_labels <- factor(labels, levels = names(point_col_ord))

mds_df <- data.frame(
  Dim1 = model3d$conf[, 1],
  Dim2 = model3d$conf[, 2],
  Dim3 = model3d$conf[, 3],
  labels = labels,
  sizes = sizes_o,
  labels_factor = ordered_labels
)

# Calculate axis ranges
axis_ranges <- apply(model3d$conf, 2, function(x) c(min(x), max(x)))
max_range <- max(abs(axis_ranges))
axis_limits <- c(-max_range, max_range)

# Create the plot
p <- plot_ly()
# Add larger, semi-transparent points
p <- p %>% add_trace(
  data = mds_df,
  x = ~Dim1, y = ~Dim2, z = ~Dim3,
  color = ~labels_factor,
  colors = point_col_ord,
  type = 'scatter3d',
  mode = 'markers',
  marker = list(size = ~sizes, sizemode = 'diameter', opacity = 0.3),
  showlegend = FALSE
)
# Add smaller, fully opaque points
p <- p %>% add_trace(
  data = mds_df,
  x = ~Dim1, y = ~Dim2, z = ~Dim3,
  color = ~labels_factor,
  colors = point_col_ord,
  type = 'scatter3d',
  mode = 'markers',
  marker = list(size = 5, sizemode = 'diameter'),
  name = ~labels_factor
)
# Add text labels
p <- p %>% add_text(
  data = mds_df,
  x = ~Dim1, y = ~Dim2, z = ~Dim3,
  text = ~labels,
  textposition = "bottom center",
  showlegend = FALSE,
  color = I("black")
)
# Edit layout
p <- p %>% layout(
  scene = list(
    xaxis = list(title = 'Dim1', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
    yaxis = list(title = 'Dim2', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
    zaxis = list(title = 'Dim3', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
    aspectmode = "cube"
  ),
  title = paste("3D MDS (Stress =", round(model3d$stress,3), ")"),
  legend = list(
    traceorder = "normal",
    itemsizing = "constant",
    font = list(size = 12)
  )
)
# Print the plot
p
### Create an interactive 3D scatter plot with equally sized points

p <- plot_ly(
  data = mds_df,
  x = ~Dim1, y = ~Dim2, z = ~Dim3,
  color = ~labels_factor,
  colors = point_col_ord,
  type = 'scatter3d',
  mode = 'markers')
# Add text labels next to the data points with black color
p <- p %>% add_text(
  text = ~labels, ### OPTIONAL:labels_full (datapoints)
  textposition = "bottom center",
  showlegend = FALSE,
  color = I("black"))
# Edit layout
p <- p %>% layout(
  scene = list(
    xaxis = list(title = 'Dim1', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
    yaxis = list(title = 'Dim2', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
    zaxis = list(title = 'Dim3', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
    aspectmode = "cube"
  ),
  title = paste("3D MDS (Stress =", round(model3d$stress,3), ")"),
  legend = list(
    traceorder = "normal",
    itemsizing = "constant"
  )
)
# Print the plot
p
## Create an interactive 3D scatter plot using full labels (legend to be used in the paper)
labels <- c(
  "Baseline" = "Baseline",
  "Alc" = "Alcohol",
  "MJ" = "Marijuana",
  "MDMA" = "MDMA",
  "Amf" = "Amphetamine",
  "LSD" = "LSD",
  "Psy" = "Psilocybin",
  "Mef" = "Mephedrone",
  "Coc" = "Cocaine",
  "Alp" = "Alprazolam",
  "Ket" = "Ketamine",
  "DMT" = "DMT",
  "N2O" = "Nitrous oxide",
  "DXM" = "DXM",
  "Cod" = "Codeine",
  "Tra" = "Tramadol",
  "Her" = "Heroin/Morphine",
  "Salv" = "Salvia",
  "GHB" = "GHB/GBL",
  "2CB" = "2C-B"
)

# Create dtf as a copy of dt
dtf <- dt

# Update column names
colnames(dtf) <- sapply(colnames(dtf), function(x) ifelse(x %in% names(labels), labels[x], x))

# Update row names if they exist
if (!is.null(rownames(dtf))) {
  rownames(dtf) <- sapply(rownames(dtf), function(x) ifelse(x %in% names(labels), labels[x], x))
}

# If there's a specific column containing these labels (let's say it's called 'substance'), update it
if ('substance' %in% colnames(dtf)) {
  dtf$substance <- sapply(dtf$substance, function(x) ifelse(x %in% names(labels), labels[x], x))
}

dtf <- as.data.frame(dtf)

## Full labels-colors 
point_col_ord_full <- c(
  Baseline = "#2B2B2B",
  Alprazolam = "#6B86B0",
  Alcohol = "#8A99BF",
  "GHB/GBL" = "#617991",
  Tramadol = "#AC845F",
  Codeine = "#ACA232",
  "Heroin/Morphine" = "#755B28",
  Cocaine = "#BF436E",
  Amphetamine = "#8B2B2B",
  Mephedrone = "#A23E71",
  MDMA = "#7B3894",
  Marijuana = "#327D43",
  "Nitrous oxide" = "#6861C7",
  DXM = "#7A65A8",
  Ketamine = "#505CB9",
  Salvia = "#AFA0BD",
  "2C-B" = "#6AA4BA",
  Psilocybin = "#5DADB3",
  LSD = "#5998BA",
  DMT = "#108BB8"
)

# Create data frame with dimensions
model3d <- mds(dtf, ndim = 3, type = "ordinal", init = "torgerson")
labels <- colnames(dtf)
mds_df <- data.frame(
  Dim1 = model3d$conf[, 1],
  Dim2 = model3d$conf[, 2],
  Dim3 = model3d$conf[, 3],
  labels = labels)
mds_df$labels_factor <- factor(labels, levels = names(point_col_ord_full)) #levels affects legend only

# Calculate axis ranges
axis_ranges <- apply(model3d$conf, 2, function(x) c(min(x), max(x)))
max_range <- max(abs(axis_ranges))
axis_limits <- c(-max_range, max_range)

p <- plot_ly(
  data = mds_df,
  x = ~Dim1, y = ~Dim2, z = ~Dim3,
  color = ~labels_factor,
  colors = point_col_ord_full,
  type = 'scatter3d',
  mode = 'markers')
# Add text labels next to the data points with black color
p <- p %>% add_text(
  text = ~labels, ### OPTIONAL:labels_full (datapoints)
  textposition = "bottom center",
  showlegend = FALSE,
  color = I("black"))
# Edit layout
p <- p %>% layout(
  scene = list(
    xaxis = list(title = 'Dim1', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
    yaxis = list(title = 'Dim2', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
    zaxis = list(title = 'Dim3', titlefont = list(size = 16, family = "Arial", weight = "bold"), range = axis_limits),
    aspectmode = "cube"
  ),
  title = paste("3D MDS (Stress =", round(model3d$stress,3), ")"),
  legend = list(
    traceorder = "normal",
    itemsizing = "constant"
  )
)
# Print the plot
p
## Alternative 3D visualization (scatterplot3D)
par(mfrow = c(1,1))
setupKnitr(autoprint = TRUE)

## Perform 3D Multidimensional Scaling
model3d <- mds(dt, ndim = 3, type = "ordinal", init = "torgerson")

labels <- rownames(model3d$conf)
mds_df <- data.frame(
  Dim1 = model3d$conf[, 1],
  Dim2 = model3d$conf[, 2],
  Dim3 = model3d$conf[, 3],
  labels = labels
)

# Calculate axis ranges
axis_ranges <- apply(model3d$conf, 2, function(x) c(min(x), max(x)))
max_range <- max(abs(axis_ranges))
axis_limits <- c(-max_range, max_range)

# Create a 3D scatter plot using scatterplot3d
p1 <- scatterplot3d(mds_df$Dim1, mds_df$Dim2, mds_df$Dim3, 
                    color = point_col[mds_df$labels], 
                    type = "h", pch = 19, 
                    xlab = "Dim1", ylab = "Dim2", zlab = "Dim3", 
                    main = "Dim1 vs Dim2 vs Dim3",
                    xlim = axis_limits, ylim = axis_limits, zlim = axis_limits,
                    scale.y = 1, angle = 60)
text(p1$xyz.convert(mds_df$Dim1, mds_df$Dim2, mds_df$Dim3), labels = mds_df$labels, pos = 4, cex = 0.6)

# Create a 3D scatter plot using scatterplot3d with dimensions swapped
p2 <- scatterplot3d(mds_df$Dim3, mds_df$Dim1, mds_df$Dim2, 
                    color = point_col[mds_df$labels], 
                    type = "h", pch = 19, 
                    xlab = "Dim3", ylab = "Dim1", zlab = "Dim2", 
                    main = "Dim3 vs Dim1 vs Dim2",
                    xlim = axis_limits, ylim = axis_limits, zlim = axis_limits,
                    scale.y = 1, angle = 60)
text(p2$xyz.convert(mds_df$Dim3, mds_df$Dim1, mds_df$Dim2), labels = mds_df$labels, pos = 4, cex = 0.6)

# Create a 3D scatter plot using scatterplot3d with dimensions swapped
p3 <- scatterplot3d(mds_df$Dim1, mds_df$Dim3, mds_df$Dim2, 
                    color = point_col[mds_df$labels], 
                    type = "h", pch = 19, 
                    xlab = "Dim1", ylab = "Dim3", zlab = "Dim2", 
                    main = "Dim1 vs Dim3 vs Dim2",
                    xlim = axis_limits, ylim = axis_limits, zlim = axis_limits,
                    scale.y = 1, angle = 60)
text(p3$xyz.convert(mds_df$Dim1, mds_df$Dim3, mds_df$Dim2), labels = mds_df$labels, pos = 4, cex = 0.6)


 

9. Baseline-centered polar coordinates model

# Data preparation for circular unfolding analysis
dt_polar <- dt
dt_polar[is.na(dt_polar)] <- 0
dt_polar <- dt_polar[-1,-1]

# Color scheme for states
point_col_radial <- c(
  Alc = "#8A99BF", MJ = "#327D43", MDMA = "#7B3894",
  Amf = "#8B2B2B", LSD = "#5998BA", Psy = "#5DADB3",
  Mef = "#A23E71", Coc = "#BF436E", Alp = "#6B86B0",
  Ket = "#505CB9", DMT = "#108BB8", N2O = "#6861C7",
  DXM = "#7A65A8", Cod = "#ACA232", Tra = "#AC845F",
  Her = "#755B28", Salv = "#AFA0BD", GHB = "#617991",
  `2CB` = "#6AA4BA"
)

# Perform circular unfolding
circ_model <- unfolding(dt_polar, itmax = 10000, circle = "column", type = "ordinal", init = "torgerson")

# Calculate and display stress value
stress_val = round(circ_model$stress,4)
stress_val
## [1] 0.181
## Configurate Circular Unfolding plot
# Get the original coordinates
conf_col_jittered <- circ_model$conf.col

# Adjust coordinates for overlapping points (for visibility puroposes, only this graph)
conf_col_jittered["Tra", ] <- conf_col_jittered["Tra", ] + c(0.01, 0.0)
conf_col_jittered["Cod", ] <- conf_col_jittered["Cod", ] + c(-0.024, -0.01)
conf_col_jittered["Ket", ] <- conf_col_jittered["Ket", ] + c(-0.035, 0.01)
conf_col_jittered["Amf", ] <- conf_col_jittered["Amf", ] + c(-0.04, 0.0)
conf_col_jittered["Alp", ] <- conf_col_jittered["Alp", ] + c(-0.01, 0.01)
conf_col_jittered["Psy", ] <- conf_col_jittered["Psy", ] + c(-0.02, -0.01)
conf_col_jittered["LSD", ] <- conf_col_jittered["LSD", ] + c(0.015, 0.02)

# Create a custom plotting function
custom_plot <- function(x, ...) {
  plot(x, ...)
  points(conf_col_jittered, col = point_col_radial, pch = 19, cex = 1.5)
}

# Visualize unfolding results with manually jittered points
custom_plot(circ_model, 
            main = "Restricted Circular Unfolding", 
            col.rows = "white", 
            col.columns = "white",  # Set to white to hide original points
            label.conf.rows = list(label = FALSE), 
            cex = 2, 
            label.conf.columns = list(label = F, pos = 3, col = "black", cex = 0.77))

## Prepare Polar coordinates plot
full_names <- c(
  Alc = "Alcohol", MJ = "Marijuana", MDMA = "MDMA",
  Amf = "Amphetamine", LSD = "LSD", Psy = "Psilocybin",
  Mef = "Mephedrone", Coc = "Cocaine", Alp = "Alprazolam",
  Ket = "Ketamine", DMT = "DMT", N2O = "Nitrous Oxide",
  DXM = "DXM", Cod = "Codeine", Tra = "Tramadol",
  Her = "Heroin", Salv = "Salvia", GHB = "GHB",
  `2CB` = "2CB")

# Extract coordinates and calculate angles
x <- circ_model$conf.col[, 1]
y <- circ_model$conf.col[, 2]
angles <- atan2(y, x)
angles_degrees <- angles * (180 / pi)

# Adjust angles for overlapping points for visibility purposes
jittered_angles <- angles_degrees
jittered_angles["Cod"] <- jittered_angles["Cod"] + 3
jittered_angles["DXM"] <- jittered_angles["DXM"] + 2
jittered_angles["Ket"] <- jittered_angles["Ket"] - 2
jittered_angles["Amf"] <- jittered_angles["Amf"] + 3
jittered_angles["Psy"] <- jittered_angles["Psy"] - 1.5
jittered_angles["LSD"] <- jittered_angles["LSD"] + 1.5
# ... continue for other substances as needed

# Create data frame with angular positions
states <- rownames(circ_model$conf.col)
angle_data <- data.frame(State = states, Angle_Radians = angles, Angle_Degrees = jittered_angles)
print(angle_data)
##      State Angle_Radians Angle_Degrees
## Alc    Alc     2.8451644     163.01591
## MJ      MJ    -2.2430961    -128.51994
## MDMA  MDMA     0.7577971      43.41858
## Amf    Amf     1.3165642      78.43357
## LSD    LSD    -0.2754239     -14.28063
## Psy    Psy    -0.2864216     -17.91075
## Mef    Mef     1.2470805      71.45245
## Coc    Coc     1.2994524      74.45314
## Alp    Alp    -2.9410997    -168.51260
## Ket    Ket    -1.9475182    -113.58457
## DMT    DMT    -0.8212646     -47.05499
## N2O    N2O    -1.8216628    -104.37359
## DXM    DXM    -1.9356109    -108.90234
## Cod    Cod    -2.8601184    -160.87271
## Tra    Tra    -2.8958027    -165.91727
## Her    Her    -2.7877533    -159.72650
## Salv  Salv    -1.7472124    -100.10790
## GHB    GHB    -3.0402974    -174.19621
## 2CB    2CB     0.0978737       5.60775
## Prepare data for polar-coordinate plot
unique_state_codes <- rownames(dt)
plot_data <- data.frame(State = unique_state_codes[-1], Dissimilarity = dt[unique_state_codes[-1], "Baseline"])
plot_data$AngularPosition <- jittered_angles
plot_data$Dissimilarity_fraction <- plot_data$Dissimilarity / 100
plot_data$D <- exp(plot_data$Dissimilarity_fraction)

## OPTIONAL CHOICES
show_labels <- FALSE
use_full_names <- FALSE 
labels_to_right <- FALSE

# Add full names to plot_data
plot_data$FullName <- full_names[plot_data$State]

# Create plotly polar plot
fig <- plot_ly(
  data = plot_data,
  type = 'scatterpolargl',
  r = plot_data$D,
  theta = plot_data$AngularPosition,
  mode = if(show_labels) 'markers+text' else 'markers',
  marker = list(
    size = 18,
    line = list(color = '#FFF'),
    opacity = 0.75,
    color = point_col_radial,
    colorscale = list(colors = as.list(point_col_radial))
  )
)

# Add text only if show_labels is TRUE
if(show_labels) {
  fig <- fig %>% add_trace(
    text = if(use_full_names) plot_data$FullName else plot_data$State,
    textposition = if(labels_to_right) 'middle right' else 'auto',
    textfont = list(size = 13)
  )
}

# Configure plot layout
# Configure plot layout
# Configure plot layout
fig <- layout(fig,
  polar = list(
    radialaxis = list(
      rangemode = 'tozero',
      range = c(exp(0), exp(1)),
      tickvals = exp(seq(0,1, by = 0.1)),
      ticktext = c(0,10,20,30,40,50,60,70,80,90,100),
      side = 'clockwise',
      showline = TRUE,
      linewidth = 2,
      tickwidth = 2,
      gridcolor = 'gray',
      gridwidth = 1,
      tickfont = list(size = 12),
      tickangle = 'auto',  # This keeps the labels vertical
      ticksuffix = ' ',  # Add a small space after tick labels
      tickmode = 'array',
      ticklabelposition = 'inside',
      ticklabelstep = 1
    ),
    angularaxis = list(
      tickwidth = 2,
      linewidth = 3,
      layer = 'below traces',
      tickfont = list(size = 12)
    )
  ),
  title = "",
  paper_bgcolor = "white",
  bgcolor = "white"
)

# Add legend and display plot
fig <- fig %>% layout(legend = list(x = 0.9, y = 0.9))
fig
## Warning: 'layout' objects don't have these attributes: 'bgcolor'
## Valid attributes include:
## '_deprecated', 'activeshape', 'annotations', 'autosize', 'autotypenumbers', 'calendar', 'clickmode', 'coloraxis', 'colorscale', 'colorway', 'computed', 'datarevision', 'dragmode', 'editrevision', 'editType', 'font', 'geo', 'grid', 'height', 'hidesources', 'hoverdistance', 'hoverlabel', 'hovermode', 'images', 'legend', 'mapbox', 'margin', 'meta', 'metasrc', 'modebar', 'newshape', 'paper_bgcolor', 'plot_bgcolor', 'polar', 'scene', 'selectdirection', 'selectionrevision', 'separators', 'shapes', 'showlegend', 'sliders', 'smith', 'spikedistance', 'template', 'ternary', 'title', 'transition', 'uirevision', 'uniformtext', 'updatemenus', 'width', 'xaxis', 'yaxis', 'barmode', 'bargap', 'mapType'
# Clean up workspace
#rm(circ_model, fig, plot_data)


 

10. Prepare data for python-based models - additional exclusions

## Perform exclusions of subjects with stereotypical response patterns (overuse of the maximal value)

# Initialization of empty arrays
resp_num <- matrix(0, nrow = ncol(list_df[[1]]), ncol = ncol(list_df[[1]]))
temp_m <- resp_num

resp_ratio= matrix(0, nrow = length(list_df), 1)

# Calculate the proportion
for (i in 1:length(list_df)) {
  df <- list_df[[i]]
  
  resp_num[!is.na(df)] <- resp_num[!is.na(df)] + 1
  temp_m[!is.na(df)] <- temp_m[!is.na(df)] + as.numeric(df[!is.na(df)])
  
  all_numeric_values <- as.vector(unlist(df, use.names = FALSE))
  numeric_values_no_na <- as.numeric(all_numeric_values[!is.na(all_numeric_values)])
  
  resp_ratio[i] = sum(numeric_values_no_na == 100)/length(numeric_values_no_na)

}

# Remove redundant objects
rm(resp_num)
rm(temp_m)

# Rename variables
resp_ratio <- as.data.frame(resp_ratio)
names(resp_ratio)[names(resp_ratio) == "V1"] <- "Proportion"
resp_ratio$ID <- c(1:length(resp_ratio$Proportion))

# Replace zeroes with minimal values (only positive values required)
resp_ratio$Proportion[resp_ratio$Proportion == 0] <- resp_ratio$Proportion[resp_ratio$Proportion == 0] + 0.01

## Determine the cut-off threshold using default settings and lognormal distribution
out_lim <- getOutliersII(resp_ratio$Proportion, distribution = "lognormal") 
cutoff_right <- out_lim$limit[2]
cutoff_right
##     Right 
## 0.6895309
hist(resp_ratio$Proportion, main = "Proportion of the maximal value", breaks = 40, xlim = c(0,1), ylim = c(0,120))
abline(v=cutoff_right, col = "darkred")

count_higher_than <- sum(resp_ratio$Proportion > cutoff_right)
count_higher_than
## [1] 29
## Filter individual subjects based on the cut-off criterion
resp_ratio$Inclusion[resp_ratio$Proportion < cutoff_right] <- T
resp_ratio$Inclusion[resp_ratio$Proportion > cutoff_right] <- F
include <- resp_ratio$ID[resp_ratio$Inclusion == T]
filtered_list_df <- list_df[include]

#Basic demographic statistics after exclusions
df_after_exclusions <- df_filtered %>% filter(ID %in% include)
df_after_exclusions %>%
  summarise(
    Number_of_Participants = n(),
    Mean_Age = mean(Age, na.rm = TRUE),
    SD_Age = sd(Age, na.rm = TRUE),
    Male_Participants = sum(Gender == "Man"),
    Female_Participants = sum(Gender == "Woman"),
    Other_Participants = sum(Gender == "Other")
  ) %>%
  print()
##   Number_of_Participants Mean_Age  SD_Age Male_Participants Female_Participants
## 1                    710 29.77465 8.09464               368                 328
##   Other_Participants
## 1                 14
MDS_ASC_additional_variables <- df_after_exclusions[c(1,4:7, 538:560, 562, 563, 594, 641)]
write.table(MDS_ASC_additional_variables, file = "MDS_ASC_additional_variables", sep = "\t")
save(filtered_list_df, file = "MDS_ASC_individual_matrices.RData")